##############################
#
# Replication file for the appendix in:
#
# Paths to Power:
# A new dataset on the social profile of governments
#
# For publication in the British Journal of Political Science
#
# Jacob Nyrup, Carl Henrik Knutsen, Peter Egge Langsæther, and Ina Lyftingsmo Kristiansen
# 
##################

### Open packages

pacman::p_load(here,tidyverse,haven,readxl,openxlsx,stringi,readr,
               hrbrthemes,scales,treemapify,RColorBrewer,viridis,extrafont,countrycode,scales,
               reactable,reactablefmtr,htmlwidgets,webshot,ggmap,ggthemes,ggplot2,WDI,xtable,
               kableExtra,vdemdata,fixest,modelsummary,huxtable,broom,etable, ggpubr, tinytable)

'%!in%' <- function(x,y)!('%in%'(x,y)) # Define function for not in

### Load data

df_panel <- read_excel("Data/PathstoPower_individuallevel_v1.0.xlsx")

df_crosssectional <- read_excel("Data/PathstoPower_countryyear_v1.0.xlsx") %>% filter(!is.na(country_isocode))

df_background <- read_rds("Data/backgroundvariables.rds") %>% mutate(year = as.character(year))

###
# Appendix D - Share missing by country and variable
###

observations <- df_panel %>% 
  filter(classification %!in% c("Representative to the United Nations","Ambassador to the United States","Governor (Central Bank)")) %>%
  arrange(country_isocode) %>%
  group_by(country_isocode) %>%
  tally()

observationseducation <- df_panel %>% 
  filter(degree %in% c("1","2","3","4")) %>%
  arrange(country_isocode) %>%
  group_by(country_isocode) %>%
  tally(name = "n_education")

observationsabroad <- df_panel %>% 
  filter(abroad=="No") %>%
  arrange(country_isocode) %>%
  group_by(country_isocode) %>%
  tally(name = "n_notabroad")

observationscodedbutshouldntbe <- df_panel %>% 
  filter(degree %in% c("1","2","3","4") & !is.na(degreetype)) %>%
  arrange(country_isocode) %>%
  group_by(country_isocode) %>%
  tally(name = "n_mistakedegreetype")

observationscodedbutshouldntbe_uni <- df_panel %>% 
  filter(degree %in% c("1","2","3","4") & !is.na(university)) %>%
  arrange(country_isocode) %>%
  group_by(country_isocode) %>%
  tally(name = "n_mistakeuni")

observationscodedbutshouldntbe_abroad <- df_panel %>% 
  filter(degree %in% c("1","2","3","4") & !is.na(abroad)) %>%
  arrange(country_isocode) %>%
  group_by(country_isocode) %>%
  tally(name = "n_mistakeabroad")

observations <- left_join(observations,observationseducation, by = c("country_isocode")) %>% 
  left_join(.,observationsabroad, by = c("country_isocode")) %>%
  left_join(.,observationscodedbutshouldntbe, by = c("country_isocode")) %>% 
  left_join(.,observationscodedbutshouldntbe_uni, by = c("country_isocode")) %>% 
  left_join(.,observationscodedbutshouldntbe_abroad, by = c("country_isocode")) %>% 
  mutate(n_education = replace_na(n_education,0),
         n_notabroad = replace_na(n_notabroad,0),
         n_mistakedegreetype = replace_na(n_mistakedegreetype,0),
         n_mistakeuni = replace_na(n_mistakeuni,0),
         n_mistakeabroad = replace_na(n_mistakeabroad,0),
         n_notabroad = n_notabroad + n_education)

sharemissing <- df_panel %>% filter(classification %!in% c("Representative to the United Nations","Ambassador to the United States","Governor (Central Bank)")) %>%
  arrange(country_isocode) %>%
  group_by(country_isocode) %>%
  summarise_all(funs(sum(is.na(.)))) %>%
  left_join(.,observations,by = c("country_isocode")) %>%
  mutate(degreetype = degreetype - n_education + n_mistakedegreetype,
         university = university - n_education + n_mistakeuni,
         abroad = abroad - n_education + n_mistakeabroad,
         abroadcountry = abroadcountry - n_notabroad + n_mistakeabroad
  ) %>%
  summarise_if(is.numeric,funs(round(./n,2))) %>%
  cbind(observations) %>%
  select("Country Isocode"=country_isocode,"Degree" = degree, "Type of degree" = degreetype,
         "University" = university,"Studied abroad" = abroad, "Country of studying abroad" = abroadcountry,
         "Place of birth" = placeofbirth, "Royal" = royal,"From a political family" = politicalfamily,
         "Class background" = classorigins, "Previous occupation" = occupation, "Background in politics" = politician) %>%
  add_row("Country Isocode" = 'Average', !!! round(colMeans(.[-1]),2)) %>%
  mutate(`Country of studying abroad` = if_else( `Country of studying abroad` < 0, 0,`Country of studying abroad`))

# Kable extra

kbl(sharemissing, booktabs = T,format = "latex", longtable = TRUE, align = 'c',caption = "Share missing \\label{tab:missing}") %>%
  kable_styling(latex_options = c("hold_position", "repeat_header","striped"),font_size = 7) %>%
  column_spec(2:12, width = "1cm") %>%
  writeLines(.,"Output/appendixd.tex")

###
# Appendix E - Testing for patterns in missing data
###

########################## Create datasets for missingness analysis########################################

missing_countryyear <- df_crosssectional# %>%

# Add GDP-data to country-year and individual dataset
gdp_data <- WDI(country = "all", indicator= "NY.GDP.PCAP.CD") %>% rename(GDPcapPPP = 5) %>% 
  subset(year >= 1966) %>% rename (country_isocode = iso3c)

missing_countryyear <- merge(missing_countryyear, gdp_data, by = c("country_isocode", "year"), all.x = TRUE)
df_panel <- merge(df_panel, gdp_data, by = c("country_isocode", "year"), all.x = TRUE)

# Add population data
pop_data <- WDI(country = "all", indicator= "SP.POP.TOTL") %>% rename(pop_tot = 5) %>% 
  subset(year >= 1966) %>% rename (country_isocode = iso3c)

missing_countryyear <- merge(missing_countryyear, pop_data, by = c("country_isocode", "year"), all.x = TRUE)
df_panel <- merge(df_panel, pop_data, by = c("country_isocode", "year"), all.x = TRUE)

rm(pop_data)

# Add polyarchy- and region-scores from the Vdem package to both datasets
Vdem <- vdem
Vdem<- subset(Vdem, year >= 1966 & year <= 2021) %>%
  rename(country_isocode = country_text_id) %>%
  select("country_isocode", "year", "v2x_polyarchy", "e_regiongeo")

missing_countryyear <- merge(missing_countryyear, Vdem, by = c("country_isocode", "year"), all.x = TRUE)
df_panel <- merge(df_panel, Vdem, by = c("country_isocode", "year"), all.x = TRUE)

rm(Vdem)

# Convert the country_year-dataset to show missingness per country per year for different variables

missing_countryyear <- missing_countryyear %>%  
  select(country_isocode, year, degree_missing, 
         placeofbirth_missing,classorigins_missing, occupation_missing, 
         politician_missing, e_regiongeo, GDPcapPPP, v2x_polyarchy, pop_tot)


# Add regions-variable to the country-year dataset

missing_countryyear <- missing_countryyear %>% mutate(europe = case_when(e_regiongeo == 1 ~ 1,
                                                                         e_regiongeo == 2 ~ 1,
                                                                         e_regiongeo == 3 ~ 1,
                                                                         e_regiongeo == 4 ~ 1,
                                                                         TRUE ~ 0),
                                                      africa = case_when(e_regiongeo == 5 ~ 1,
                                                                         e_regiongeo == 6 ~ 1,
                                                                         e_regiongeo == 7 ~ 1,
                                                                         e_regiongeo == 8 ~ 1,
                                                                         e_regiongeo == 9 ~ 1,
                                                                         TRUE ~ 0),
                                                      asia = case_when(e_regiongeo == 10 ~ 1,
                                                                       e_regiongeo == 11 ~ 1,
                                                                       e_regiongeo == 12 ~ 1,
                                                                       e_regiongeo == 13 ~ 1,
                                                                       e_regiongeo == 14 ~ 1,
                                                                       TRUE ~ 0),
                                                      oceania = case_when(e_regiongeo == 15 ~ 1,
                                                                          TRUE ~ 0),
                                                      north_am = case_when(e_regiongeo == 16 ~ 1,
                                                                           e_regiongeo == 17 ~ 1,
                                                                           e_regiongeo == 19 ~ 1,
                                                                           TRUE ~ 0),
                                                      south_am = case_when(e_regiongeo == 18 ~ 1,
                                                                           TRUE ~ 0))


# Make a dataset that shows missingness per year for different variables

missing_year <- missing_countryyear %>% 
  select(country_isocode, year, degree_missing, placeofbirth_missing,
         classorigins_missing, occupation_missing, politician_missing) %>%
  group_by(year) %>%
  summarise(degree_missing = mean(degree_missing),
            placeofbirth_missing = mean(placeofbirth_missing),
            classorigins_missing = mean(classorigins_missing),
            occupation_missing = mean(occupation_missing),
            politician_missing = mean(politician_missing))

# Make a dataset for missingness per country with different predictors

# Add individual factors to predict missingness for individual cabinet members

missing_within <- df_panel %>% 
  select(country_isocode, year, core, minister, leader, position, prestige_1, gender, degree, classorigins, 
         occupation, politician, name, e_regiongeo, v2x_polyarchy, GDPcapPPP, prestige_1) %>%
  mutate(missing_degree = ifelse(is.na(degree), 1, 0),
         missing_class = ifelse(is.na(classorigins), 1, 0),
         missing_occupation = ifelse(is.na(occupation), 1, 0),
         missing_politician = ifelse(is.na(politician), 1, 0)) %>% 
  mutate(female_dummy = case_when(gender == "Female" ~ 1,
                                  gender == "Male" ~ 0, 
                                  TRUE ~ NA)) %>% 
  mutate(prestige_high = case_when(prestige_1 == "High" ~ 1,
                                   prestige_1 != "High" ~ 0))%>%
  mutate(prestige_med = case_when(prestige_1 == "Medium" ~ 1,
                                  prestige_1 != "Medium" ~ 0))%>%
  mutate(prestige_low = case_when(prestige_1 == "Low" ~ 1,
                                  prestige_1 != "Low" ~ 0))


##########Missingness Analysis ##############################

# Country-year estimates of missingness for degree, class and occupation

cy_degree1 <- feols(degree_missing ~ v2x_polyarchy, 
                    cluster = "country_isocode", missing_countryyear)

cy_degree2 <- feols(degree_missing ~ log(GDPcapPPP), 
                    cluster = "country_isocode", missing_countryyear)

cy_degree3 <- feols(degree_missing ~ log(pop_tot), 
                    cluster = "country_isocode", missing_countryyear)

cy_degree4 <- feols(degree_missing ~  africa + asia + oceania + north_am + south_am, 
                    cluster = "country_isocode", missing_countryyear)


cy_class1 <- feols(classorigins_missing ~ v2x_polyarchy, 
                   cluster = "country_isocode", missing_countryyear)

cy_class2 <- feols(classorigins_missing ~ log(GDPcapPPP), 
                   cluster = "country_isocode", missing_countryyear)

cy_class3 <- feols(classorigins_missing ~ log(pop_tot), 
                   cluster = "country_isocode", missing_countryyear)

cy_class4 <- feols(classorigins_missing ~  africa + asia + oceania + north_am + south_am, 
                   cluster = "country_isocode", missing_countryyear)

cy_occupation1 <- feols(occupation_missing ~ v2x_polyarchy, 
                        cluster = "country_isocode", missing_countryyear)

cy_occupation2 <- feols(occupation_missing ~ log(GDPcapPPP), 
                        cluster = "country_isocode", missing_countryyear)

cy_occupation3 <- feols(occupation_missing ~ log(pop_tot), 
                        cluster = "country_isocode", missing_countryyear)

cy_occupation4 <- feols(occupation_missing ~  africa + asia + oceania + north_am + south_am, 
                        cluster = "country_isocode", missing_countryyear)

############# Make table for country-year results
models_cy <- list("(1A)" = cy_degree1,
                  "(1B)" =  cy_degree2, 
                  "(1C)" = cy_degree3, 
                  "(1D)" = cy_degree4,
                  "(2A)" = cy_class1, 
                  "(2B)" = cy_class2, 
                  "(2C)" = cy_class3, 
                  "(2D)" = cy_class4,
                  "(3A)" = cy_occupation1, 
                  "(3B)" = cy_occupation2, 
                  "(3C)" = cy_occupation3, 
                  "(3D)" = cy_occupation4)

gof_map <- data.frame(
  original = c("Num.Obs","R2"),
  new = c("Observations", "R2"),
  stringsAsFactors = FALSE
)

gm <- tibble::tribble(
  ~raw, ~clean, ~fmt,
  "nobs", "Observations", 0,
  "r.squared", "R2", 2)


modelsummary(models_cy,
                       title = "Predicting missingness for country-year data",
                       stars = c('*' = .05),
                       fmt = 2,
                       coef_map = c("v2x_polyarchy" = "Polyarchy VDEM",
                                      "log(GDPcapPPP)" = "Log (GDP per capita)",
                                      "log(pop_tot)" = "Log(Population)",
                                      "africa" = "Africa",
                                      "asia" = "Asia",
                                      "oceania" = "Oceania",
                                      "north_am" = "North America",
                                      "south_am" = "South America",
                                      "female_dummy" = "Female",
                                      "(Intercept)" = "Constant"),
                       gof_map = gm,
                       estimate = "{estimate}{stars}",
                       #output = "Output/missing_countryyear.html",
                       threeparttable=TRUE,
                       notes = list("Reference category for the continents is Europe. Standard errors are clustered by country. *p < 0.05."
                       )) 


#write(reg_cy,"Output/tablee1.tex")


################### Individual estimates

ind_degree1 <- feols(missing_degree ~ leader, 
                     cluster = "country_isocode", missing_within)

ind_degree2 <- feols(missing_degree ~ female_dummy, 
                     cluster = "country_isocode", missing_within)

ind_degree3 <- feols(missing_degree ~ prestige_high + prestige_med, 
                     cluster = "country_isocode", missing_within)


ind_class1 <- feols(missing_class ~ leader, 
                    cluster = "country_isocode", missing_within)

ind_class2 <- feols(missing_class ~ female_dummy, 
                    cluster = "country_isocode", missing_within)

ind_class3 <- feols(missing_class ~ prestige_high + prestige_med, 
                    cluster = "country_isocode", missing_within)

ind_occupation1 <- feols(missing_occupation ~ leader, 
                         cluster = "country_isocode", missing_within)

ind_occupation2 <- feols(missing_occupation ~ female_dummy, 
                         cluster = "country_isocode", missing_within)

ind_occupation3 <- feols(missing_class ~ prestige_high + prestige_med, 
                         cluster = "country_isocode", missing_within)

ind_politician1 <- feols(missing_politician ~ leader, 
                         cluster = "country_isocode", missing_within)

ind_politician2 <- feols(missing_politician ~ female_dummy, 
                         cluster = "country_isocode", missing_within)

ind_politician3 <- feols(missing_politician ~ prestige_high + prestige_med, 
                         cluster = "country_isocode", missing_within)


###################### Make table for individual results

models_ind <- list("(1A)" = ind_degree1,
                   "(1B)" = ind_degree2,
                   "(1C)" = ind_degree3,
                   "(2A)" = ind_class1,
                   "(2B)" = ind_class2,
                   "(2C)" = ind_class3,
                   "(3A)" = ind_occupation1,
                   "(3B)" = ind_occupation2,
                   "(3C)" = ind_occupation3,
                   "(4A)" = ind_politician1,
                   "(4B)" = ind_politician2,
                   "(4C)" = ind_politician3)



modelsummary(models_ind,
                        title = "Predicting missingness for individual-level data",
                        stars = c('*' = .05),
                        fmt = 2,
                        coef_map = c("leader1" = "Leader",
                                     "female_dummy" = "Female",
                                     "prestige_high" = "High Prestige",
                                     "prestige_med" = "Medium Prestige",
                                     "(Intercept)" = "Constant"),
                        gof_map = gm,
                        estimate = "{estimate}{stars}",
                        #output = "Output/missing_individual.png",
                        threeparttable=TRUE,
                        notes = list("Reference category for High and Medium Prestige is low-prestige positions. Standard errors are clustered by country. *p < 0.05."
                        ))


#write(reg_ind,"Output/tablee2.tex")



#Checking for missingness patterns in gender caused by time trends 
ind_degree_yearfix <- feols(missing_degree ~ female_dummy |year, 
                            cluster = "country_isocode", missing_within)


ind_class_yearfix <- feols(missing_class ~ female_dummy |year, 
                           cluster = "country_isocode", missing_within)

ind_occupation_yearfix <- feols(missing_occupation ~ female_dummy |year, 
                                cluster = "country_isocode", missing_within)

ind_politician_yearfix <- feols(missing_politician ~ female_dummy |year, 
                                cluster = "country_isocode", missing_within)


models_ind_yf <- list("Education" = ind_degree_yearfix,
                      "Occupation" = ind_occupation_yearfix,
                      "Politician" = ind_politician_yearfix,
                      "Class" = ind_class_yearfix)


modelsummary(models_ind_yf,
                        stars = c('*' = .05),
                        fmt = 2,
                        coef_map = c("female_dummy" = "Female",
                                     "(Intercept)" = "Constant"),
                        gof_map = gm,
                       estimate = "{estimate}{stars}",
                        title = "Predicting missingness for female ministers controlling for time",
                       # output = "Output/missing_female_yearfixed.html",
                        threeparttable=TRUE,
                        notes = list("Standard errors are clustered by country. *p < 0.05."
                        )) 
  


#write(reg_ind_yf,"Output/tablee3.tex")


#Dataviz for graph
plot_data <- missing_year %>% mutate(missing_degree = degree_missing*100,
                                     missing_class = classorigins_missing*100,
                                     missing_occupation = occupation_missing*100,
                                     missing_politician = politician_missing*100)


# Plot of missingness for four different variables
plot_data2 <- plot_data %>%
  select(year, missing_degree, missing_class, missing_occupation, missing_politician) %>%
  gather(key = "variable", value = "value", -year) %>% filter(year > 1965 & year < 2022) %>% 
  mutate(year = as.numeric(as.character(year)))


fourinone <- ggplot(plot_data2, aes(x = year, y = value)) + 
  geom_line(aes(color = variable)) + 
  scale_color_manual(values = c("#00008B", "#273046", "#999999","#CB2314"),
                     name= "Variables",
                     breaks = c("missing_class", "missing_degree", "missing_occupation", "missing_politician"),
                     labels=c("Class","Degree", "Occupation", "Political Experience")) +
  theme_classic() + theme(axis.title = element_blank()) +
  scale_x_continuous(breaks = c(1970, 1980, 1990, 2000, 2010, 2020)) +
  scale_y_continuous(breaks = c(10, 20, 30, 40, 50, 60),
                     labels = c("10%", "20%", "30%", "40%", "50%", "60%"),
                     limits = c(10, 65)) +
  ggtitle("% missingness by year")

ggsave("Output/figuree1.jpg",
       fourinone,
       width = 6,
       height = 4,
       dpi = 1200
)

###
# Appendix F - Extension of Besley and Reynal-Querol (2011)
###

df_appendixf <- df_panel %>% mutate(educationhigh = case_when(degree %in% c(1,2,3,4,5,6,7,10) ~ 0,
                                                              degree %in% c(8,9) ~ 1),
                                                              wenttouniversity =  case_when(degree %in% c(1,2,3,4,5) ~ 0,
                                                              degree %in% c(6,7,8,9,10) ~ 1))

df_leadereduc <- df_appendixf %>% filter(leader == 1) %>% select(year,country_isocode,leader_education = educationhigh, leader_wenttouniversity = wenttouniversity)

df_ministereduc <- df_appendixf %>% 
  filter(core == 1 & leader == 0) %>% 
  group_by(year,country_isocode) %>%
  summarize(minister_education = mean(educationhigh,na.rm=TRUE),
            minister_wenttouniversity = mean(wenttouniversity,na.rm=TRUE))

df_appendixf_cross <- df_crosssectional %>% select(year,country_isocode) %>% mutate(year = as.character(year))

# Merge

df_appendixf_cross <- left_join(df_appendixf_cross,df_background,by=c("country_isocode","year")) %>%
              left_join(.,df_leadereduc,by=c("country_isocode","year")) %>%
              left_join(.,df_ministereduc,by=c("country_isocode","year")) %>% 
              mutate(year = as.numeric(year))



# Yearly data

df_yearly <- df_appendixf_cross %>% group_by(year,democracy_polity) %>% 
  summarize(mean_leader = mean(leader_education,na.rm=TRUE),
            mean_cabinet = mean(minister_education,na.rm=TRUE)) %>%
  filter(!is.na(democracy_polity) & year > 1965) %>%
  mutate(democracy_polity = as.factor(democracy_polity),
         year = as.numeric(year)) %>%
  pivot_longer(cols = c(mean_leader,mean_cabinet),
               names_to = "group",
               values_to = "share_educated")


###
# Table F1 ---
###

# Leader

reg1_leader <- feols(leader_education ~ democracy_polity + gdp_cap_pwt_ln | country_isocode + year, cluster = "country_isocode", data=df_appendixf_cross)

reg2_leader <- feols(leader_education ~ democracy_polity + gdp_cap_pwt_ln | country_isocode[year] + year, cluster = "country_isocode", data=df_appendixf_cross)

reg3_leader <- feols(leader_education ~ democracy_polity + gdp_cap_pwt_ln + bl_asymf | country_isocode[year] + year, cluster = "country_isocode", data=df_appendixf_cross)

# Minister

reg1_minister <- feols(minister_education ~ democracy_polity + gdp_cap_pwt_ln | country_isocode + year, cluster = "country_isocode", data=df_appendixf_cross)

reg2_minister <- feols(minister_education ~ democracy_polity + gdp_cap_pwt_ln | country_isocode[year] + year, cluster = "country_isocode", data=df_appendixf_cross)

reg3_minister <- feols(minister_education ~ democracy_polity + gdp_cap_pwt_ln + bl_asymf | country_isocode[year] + year, cluster = "country_isocode", data=df_appendixf_cross)

# Model summary

models <- list(
  "(1)" = reg1_leader,
  "(2)" = reg2_leader,
  "(3)" = reg3_leader,
  "(4)" = reg1_minister,
  "(5)" = reg2_minister,
  "(6)" = reg3_minister
)


gof_map <- data.frame(
  original = c("Num.Obs","R2","FE:country_isocode","FE:year"),
  new = c("Observations", "R2", "Country fixed-effects", "Year fixed-effects"),
  stringsAsFactors = FALSE
)

gm <- tibble::tribble(
  ~raw, ~clean, ~fmt,
  "nobs", "Observations", 0,
  "r.squared", "R^2", 2)

gl <- data.frame(
  "Country-specific time trends"," ", "Yes", "Yes", " ", "Yes", "Yes")

reg_tabf1 <- modelsummary(models,
                     stars = c('*' = .05),
                     fmt = 2,
                     coef_map = c("democracy_polity" = "Democracy",
                                  "gdp_cap_pwt_ln" = "Log (GDP per capita)",
                                  "bl_asymf" = "Average years of education",
                                  "(Intercept)" = "Constant"),
                     gof_map = gm,
                     add_rows = gl,
                     estimate = "{estimate}{stars}",
                     title = "Regime type and educational attainment: OLS regressions \\label{tab:education}",
                     output = "latex",
                     threeparttable=TRUE,
                     notes = list("We follow the specifications from Besley and Reynal-Querol (2011). All specifications include country and year dummies and are reported with standard errors clustered at the country level. Standard errors are in parentheses. The dependent variable is either whether the leader has a graduate degree or the share of ministers with a graduate degree. Democracy is a dummy variable that has value 1 if the
             polity2 score is higher than 0, and 0 otherwise. The full sample is a panel of 141 countries in the period 1966-2018. *p < 0.05."
                     ))

###
## Appendix: Went to university
###

# Yearly data

df_attendeduni <- df_appendixf_cross %>% group_by(year,democracy_polity) %>% 
  summarize(mean_leader = mean(leader_wenttouniversity,na.rm=TRUE),
            mean_cabinet = mean(minister_wenttouniversity,na.rm=TRUE)) %>%
  filter(!is.na(democracy_polity) & year > 1965) %>%
  mutate(democracy_polity = as.factor(democracy_polity)) %>%
  pivot_longer(cols = c(mean_leader,mean_cabinet),
               names_to = "group",
               values_to = "share_educated")

# Make graph

plot_wenttouniversity <- ggplot(data=df_attendeduni, aes(x=year, y=share_educated,
                                                         linetype = group,
                                                         color = democracy_polity)) +
  geom_line(size=1.1) +
  theme_bw() + 
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        legend.position = "right",
        legend.title = element_blank(),
        plot.title = element_text(size=9),
        axis.title.x = element_text(size=10),
        axis.title.y = element_text(size=10)) +
  scale_x_continuous(breaks=seq(1970,2021,10)) + 
  scale_y_continuous(labels=scales::percent_format(accuracy = 1), limits = c(0,1)) +
  ylab("") +
  xlab("") +
  labs(title = "% with high education") +
  scale_color_manual(values=c("#CB2314","#273046"),
                     name = "Regime",
                     labels = c("Autocracy", "Democracy")) +
  scale_linetype_manual(values = c("solid", "dotdash"),
                        labels = c("Cabinet", "Leader"))

plot_wenttouniversity

ggsave(
  "Output/figuref1.jpg",
  plot_wenttouniversity,
  width = 6,
  height = 4,
  dpi = 1200
)

###
# Table F2 ---
###

# Leader

reg1_leader_app <- feols(leader_wenttouniversity ~ democracy_polity + gdp_cap_pwt_ln | country_isocode + year, cluster = "country_isocode", data=df_appendixf_cross)

reg2_leader_app <- feols(leader_wenttouniversity ~ democracy_polity + gdp_cap_pwt_ln | country_isocode[year] + year, cluster = "country_isocode", data=df_appendixf_cross)

reg3_leader_app <- feols(leader_wenttouniversity ~ democracy_polity + gdp_cap_pwt_ln + bl_asymf | country_isocode[year] + year, cluster = "country_isocode", data=df_appendixf_cross)

# Minister

reg1_minister_app <- feols(minister_wenttouniversity ~ democracy_polity + gdp_cap_pwt_ln | country_isocode + year, cluster = "country_isocode", data=df_appendixf_cross)

reg2_minister_app <- feols(minister_wenttouniversity ~ democracy_polity + gdp_cap_pwt_ln | country_isocode[year] + year, cluster = "country_isocode", data=df_appendixf_cross)

reg3_minister_app <- feols(minister_wenttouniversity ~ democracy_polity + gdp_cap_pwt_ln + bl_asymf | country_isocode[year] + year, cluster = "country_isocode", data=df_appendixf_cross)

# Model summary

models_app <- list(
  "(1)" = reg1_leader_app,
  "(2)" = reg2_leader_app,
  "(3)" = reg3_leader_app,
  "(4)" = reg1_minister_app,
  "(5)" = reg2_minister_app,
  "(6)" = reg3_minister_app
)

reg_tabf2 <- modelsummary(models_app,
                       stars = c('*' = .05),
                       fmt = 2,
                       coef_map = c("democracy_polity" = "Democracy",
                                    "gdp_cap_pwt_ln" = "Log (GDP per capita)",
                                    "bl_asymf" = "Average years of education",
                                    "(Intercept)" = "Constant"),
                       gof_map = gm,
                       add_rows = gl,
                       estimate = "{estimate}{stars}",
                       title = "Regime type and educational attainment: OLS regressions \\label{tab:education}",
                       output = "latex",
                       threeparttable=TRUE,
                       notes = list("We follow the specifications from Besley and Reynal-Querol (2011). All specifications include country and year dummies and are reported with standard errors clustered at the country level. Standard errors are in parentheses. The dependent variable is either whether the leader has attended university or the share of ministers who have attended university. Democracy is a dummy variable that has value 1 if the
                     polity2 score is higher than 0, and 0 otherwise. The full sample is a panel of 131 countries in the period 1966-2018. *p < 0.05."
                       ))


###
# Appendix G - Intercode reliability test
###

# Found in " appendixG"-do file
